/*

****************************************************************************

Summary: Create a file with sentence information, including dates of sentences

****************************************************************************

*/

clear
set more off

global nexpbase          ""
 // directory for storing file with demographic information for IDHS data
global demo              ""
 // directory for storing demolition related files
global temp              ""
 // directory for temporary files created for this project				     
global xwalk_assist      ""
 // directory for cross-walk file which contains IDs for the social assistance files
global labor             ""
 // directory for IDES data
global match_ui          ""
 // directory for cross-walk file for IDES and Chapin files
global geo               ""
 // directory for geocoded address assistance data file
global dofiles           ""
 // directory for replication do-files
global isp               ""
 // directory for ISP data

global step1=1
global step2=1

********************************************************************************
*** 1. Create a version of the analysis file that is unique at the 2006 ID level
********************************************************************************

if $step1==1 {
	
	gzuse "${demo}analysis_file_final.dta.gz", clear
	
	* obtain demographics + chdhsid_200606
	!gunzip "${temp}temp_file3_bdates_chdhsid_201212.dta.gz"
	merge n:1 chdhsid_201212 using "${temp}temp_file3_bdates_chdhsid_201212.dta", keep(1 3) keepusing(chdhsid_200606)
	!gzip "${temp}temp_file3_bdates_chdhsid_201212.dta"
	 // created in "3_make_analysis_sample_final.do"
	drop _merge // 100% match rate
	
	rename chdhsid_200606 studyid
	
	keep studyid treat_yr
	duplicates drop
	
	duplicates tag studyid, gen(flag)
	
	drop if flag>0
	
	tempfile temp
	save `temp', replace
	 // file with treat_yr for all displaced person
	
	}
	
********************************************************************************
*** 2. Process and merge imprisonment file
********************************************************************************

if $step2==1 {

	* make a temporary file for the sentences
	use ${isp}Michigan20120605bSentences.dta, clear
	
	des
	duplicates report

	append using ${isp}Michigan20120605cSentences.dta
	append using ${isp}Michigan20120605dSentences.dta
	append using ${isp}Michigan20120605eSentences.dta
	append using ${isp}Michigan20120605Sentences.dta

	duplicates report
	duplicates report StudyID
	 // individuals have multiple sentencing incidents

	rename StudyID studyid

	gen sentenced = 1 
	gen sentenced_nonfine = 1
	 replace sentenced_nonfine = 0 if regexm(SentDesc,"FINE")
	
	gen sentenced_imprison = 0
	 replace sentenced_imprison = 1 if regexm(SentDesc,"IMPRISON")
	 
	gen SentDate_td = dofc(SentDate)
	format SentDate_td %td
	gen year = year(SentDate_td)
	
	* obtain treat-year to drop all pre-treated sentences
	merge n:1 studyid using `temp', keep(1 3) keepusing(treat_yr)
	
	drop if year<=treat_yr & _merge==3
	drop if _merge==1
	
    * create sentence length var
	destring(slYears), replace
	destring(slMonths), replace
	destring(slDays), replace
	
	* replace missing values with 0
	replace slYears = 0 if missing(slYears)
	replace slMonths = 0 if missing(slMonths)
	replace slDays = 0 if missing(slDays)
	
	gen sentence_length = slYears*365+slMonths*30.50+slDays
	
	sum sentence_length, det
	count if sentence_length==0 & regexm(SentDesc,"IMPRISON")
	count if sentence_length~=0 & regexm(SentDesc,"IMPRISON")
	sum sentence_length if regexm(SentDesc,"IMPRISON"), det
	
	count if sentence_length==0 & regexm(SentDesc,"IMPRISONMENT-DOC")
	count if sentence_length~=0 & regexm(SentDesc,"IMPRISONMENT-DOC")
	
	*** create measures of incarceration by year
	
	* restrict sentences to jail/prison
	assert sentence_length~=. // never missing (but it can be 0)
	replace sentence_length=. if !regexm(SentDesc,"IMPRISON")
	
	* impute missing values (assume average)
	count if sentence_length==0 & regexm(SentDesc,"IMPRISON")
	sum sentence_length if regexm(SentDesc,"IMPRISON") & sentence_length>0
	replace sentence_length = r(mean) if sentence_length==0 & regexm(SentDesc,"IMPRISON")
	assert sentence_length~=0 if regexm(SentDesc,"IMPRISON")
	
	* release date
	gen ReleaseDate_td = SentDate_td+sentence_length if regexm(SentDesc,"IMPRISON")
	format ReleaseDate_td %td
	
	* sentence year
	gen SentYear = year(SentDate_td)
	replace SentYear = . if SentYear>2012 
	
	* release year
	gen ReleaseYear = year(ReleaseDate_td)
	assert ReleaseYear ~=. if regexm(SentDesc,"IMPRISON")
	
	* prison status in every year 2000-2011
	forvalues i=2000(1)2011 {
	 gen imprison_`i' = (`i' <= ReleaseYear & `i' >= SentYear & regexm(SentDesc,"IMPRISON")) if SentYear~=.
	 }
	
	gen sentenced_imprison_b2008 = (sentenced_imprison==1 & SentYear<2008)
		 
	* flatten file
	collapse (max) sentenced sentenced_nonfine sentenced_imprison imprison_* sentenced_imprison_b2008 (sum) sentence_length, by(studyid)
		
	compress
	
	lab data "Sentenced x-sec file for demolition sample"
	
	save "${temp}temp_sentenced.dta", replace
	
}
